| test_eRNA_vs_Peng |
2 |
- results/2018-11-10/test/hg19/IMR_eRNA_overlaps.bed
- results/2018-11-10/test/hg19/GM_eRNA_overlaps.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools intersect -a {input.peng} -b {input.edmund} -sorted -u > {output} 2> {log}
|
|
| test_hg19_vs_hg18 |
2 |
- results/2018-11-10/test/IMR_hg19_vs_hg18_eRNA.bed
- results/2018-11-10/test/GM_hg19_vs_hg18_eRNA.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools intersect -a {input.hg19} -b {input.hg18} -sorted -u > {output} 2> {log}
|
|
| test_eRNA_vs_liftOver |
2 |
- results/2018-11-10/test/hg19/IMR_eRNA_vs_liftOver.bed
- results/2018-11-10/test/hg19/GM_eRNA_vs_liftOver.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools intersect -a {input.eRNA} -b {input.liftOver} -sorted -u > {output} 2> {log}
|
|
| fig_predicted_eRNA_peng |
2 |
- results/2018-10-12/hg19/IMR_eRNA_overlaps.svg
- results/2018-10-12/hg19/GM_eRNA_overlaps.svg
|
docker://continuumio/miniconda3:4.6.14 |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 | """
Takes in 3 files
0 - the left
1 - the right
2 - the overlap
"""
from matplotlib import pyplot as plt
from matplotlib_venn import venn2
import os
with open(snakemake.input[0]) as f:
count1 = sum(1 for _ in f)
with open(snakemake.input[1]) as f:
count2 = sum(1 for _ in f)
with open(snakemake.input[2]) as f:
common = sum(1 for _ in f)
fileName0 = os.path.basename(os.path.normpath(snakemake.input[0]))
fileName1 = os.path.basename(os.path.normpath(snakemake.input[1]))
fileName2 = os.path.basename(os.path.normpath(snakemake.input[2]))
v = venn2(
subsets=((count1 - common), (count2 - common), common),
set_labels=(fileName0, fileName1, fileName2),
)
v.get_patch_by_id("100").set_color("blue")
v.get_patch_by_id("010").set_color("red")
v.get_patch_by_id("110").set_color("purple")
plt.title(snakemake.params["title"])
plt.savefig(snakemake.output[0])
|
|
| fig_predicted_eRNA_cross_cell |
1 |
- results/2018-10-12/hg19/eRNA_cross_cell.svg
|
docker://continuumio/miniconda3:4.6.14 |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 | """
Takes in 3 files
0 - the left
1 - the right
2 - the overlap
"""
from matplotlib import pyplot as plt
from matplotlib_venn import venn2
import os
with open(snakemake.input[0]) as f:
count1 = sum(1 for _ in f)
with open(snakemake.input[1]) as f:
count2 = sum(1 for _ in f)
with open(snakemake.input[2]) as f:
common = sum(1 for _ in f)
fileName0 = os.path.basename(os.path.normpath(snakemake.input[0]))
fileName1 = os.path.basename(os.path.normpath(snakemake.input[1]))
fileName2 = os.path.basename(os.path.normpath(snakemake.input[2]))
v = venn2(
subsets=((count1 - common), (count2 - common), common),
set_labels=(fileName0, fileName1, fileName2),
)
v.get_patch_by_id("100").set_color("blue")
v.get_patch_by_id("010").set_color("red")
v.get_patch_by_id("110").set_color("purple")
plt.title(snakemake.params["title"])
plt.savefig(snakemake.output[0])
|
|
| eRNA_link_genes |
2 |
- results/2019-08-26/hg19/GM_link_eRNA.bed
- results/2019-08-26/hg19/IMR_link_eRNA.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools window -u -w {params.window} -a {input.eRNA} -b {input.dges} > {output}
|
|
| eRNA_link_area |
1 |
- results/2019-08-26/eRNA_viral.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools intersect -wa -a {input.merge} -b {input.ov} > {output}
|
|
| test_report_L2 |
1 |
- results/2019-10-01/L2.tsv
|
docker://continuumio/miniconda3:4.6.14 |
|
|
| fig_linked_eRNA_cross_cell |
1 |
- results/2018-10-01/hg19/eRNA_cross_cell_viral.svg
|
docker://continuumio/miniconda3:4.6.14 |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 | """
Takes in 3 files
0 - the left
1 - the right
2 - the overlap
"""
from matplotlib import pyplot as plt
from matplotlib_venn import venn2
import os
with open(snakemake.input[0]) as f:
count1 = sum(1 for _ in f)
with open(snakemake.input[1]) as f:
count2 = sum(1 for _ in f)
with open(snakemake.input[2]) as f:
common = sum(1 for _ in f)
fileName0 = os.path.basename(os.path.normpath(snakemake.input[0]))
fileName1 = os.path.basename(os.path.normpath(snakemake.input[1]))
fileName2 = os.path.basename(os.path.normpath(snakemake.input[2]))
v = venn2(
subsets=((count1 - common), (count2 - common), common),
set_labels=(fileName0, fileName1, fileName2),
)
v.get_patch_by_id("100").set_color("blue")
v.get_patch_by_id("010").set_color("red")
v.get_patch_by_id("110").set_color("purple")
plt.title(snakemake.params["title"])
plt.savefig(snakemake.output[0])
|
|
| eRNA_gene_groups |
2 |
- results/2019-08-26/hg19/GM_eRNA_gene_group.bed
- results/2019-08-26/hg19/IMR_eRNA_gene_group.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools window -w {params.window} -a {input.dges} -b {input.eRNA} > {output}
|
|
| eRNAs |
2 |
- results/2018-12-02/hg19/IMR_eRNA.bed
- results/2018-12-02/hg19/GM_eRNA.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools intersect -a {input.no_genes} -b {input.H3K27ac} {input.H3K4me1} -sorted -u -bed > {output} 2> {log}
|
|
| test_IMR_vs_GM |
1 |
- results/2018-11-10/test/hg19/IMR_eRNA_vs_GM_hg19.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools intersect -a {input.IMR} -b {input.GM19} -sorted -u > {output} 2> {log}
|
|
| eRNA_link_merge |
1 |
- results/2019-08-26/eRNA_merged.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| cat {input} | sort -k1,1 -k2,2n | bedtools merge -i stdin -s {params.col} > {output}
|
|
| eRNA_link_overlap |
1 |
- results/2019-08-26/eRNA_overlap_viral.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools intersect -wo -a {input.GM} -b {input.IMR} > {output}
|
|
| test_eRNA_check_L2 |
2 |
- results/2019-08-26/GM_L2.bed
- results/2019-08-26/IMR_L2.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools closest -a {input.l2} -b {input.eRNA} > {output}
|
|
| test_homer_check_L2 |
2 |
- results/2019-10-01/hg19/GM_genes_L2.bed
- results/2019-10-01/hg19/IMR_genes_L2.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools closest -a {input.l2} -b {input.homer} > {output}
|
|
| eRNA_ripgrep_id |
2 |
- results/2019-06-26/dge/rg/hg19/GM_de_ripgrep.bed
- results/2019-06-26/dge/rg/hg19/IMR_de_ripgrep.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| rg --dfa-size-limit 2G -w -f {input.inducibleId} {input.linkedeRNAs} > {output}
|
|
| removeGenes |
2 |
- results/2018-11-09/hg19/IMR_meta_transcripts_noGenes.bed
- results/2018-11-09/hg19/GM_meta_transcripts_noGenes.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| bedtools intersect -a {input.transcripts} -b {input.refseq} -v | sort -k1,1 -k2,2n - > {output} 2> {log}
|
|
| homer_meta_pos2bed |
2 |
- results/2018-11-07/hg19/GM_meta_transcripts.bed
- results/2018-11-07/hg19/IMR_meta_transcripts.bed
|
docker://continuumio/miniconda3:4.6.14 |
|
| pos2bed.pl {input} | sort -k1,1 -k2,2n - > {output}
|
|
| eRNA_Inducible_id |
2 |
- results/2019-08-26/dge/rg/GM_de_eRNA_id.txt
- results/2019-08-26/dge/rg/IMR_de_eRNA_id.txt
|
docker://continuumio/miniconda3:4.6.14 |
|
| awk -F "\t" '{{ if (NR!=1){{ print $1 }}}}' {input} > {output}
|
|
| hg19_meta_findPeaks |
2 |
- results/2018-11-07/hg19/GM_meta_transcripts.txt
- results/2018-11-07/hg19/IMR_meta_transcripts.txt
|
docker://emiller88/homer:latest |
|
| findPeaks {input.tagdir} -style groseq -o {output} -uniqmap {input.uniqmap}
|
|
| eRNA_viral_foldchange |
2 |
- results/2019-09-27/de/foldchange/GM_eRNA_foldchange.tsv
- results/2019-09-27/de/foldchange/IMR_eRNA_foldchange.tsv
|
docker://continuumio/miniconda3:4.6.14 |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 | #!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
import pandas as pd
# Read in tpm values
df = pd.read_csv(snakemake.input[0], sep="\t", index_col="Geneid")
# Take log2 of samples
dflog = np.log2(df)
# Calculate percent change
dfFold = dflog.pct_change(axis="columns")
# Drop 0h since it won't have any fold change
dfFold.dropna(axis=1, inplace=True)
# Drop anything > 24h
try:
dfFold = dfFold.drop(columns=["GM48h", "GM72h"])
except KeyError:
pass # do nothing!
# Drop anything that don't have a 1 fold change up or down(easy to change), might even make it a parameter in snakemake
dfDGEup = dfFold[dfFold >= 1].dropna(thresh=1)
dfDGEdown = dfFold[dfFold <= -1].dropna(thresh=1)
frames = [dfDGEup, dfDGEdown]
dfDGE = pd.concat(frames)
# This brings the TPM values back into the list of DE genes
dfDGE = df.merge(dfDGE, on="Geneid", suffixes=("", "_y"))
dfDGE = dfDGE.drop(dfDGE.filter(like="_y", axis=1).columns, axis=1)
dfDGE.to_csv(snakemake.output[0], sep="\t")
|
|
| eRNA_viral_tpm |
2 |
- results/2019-09-27/de/tpm/GM_eRNA_tpm.txt
- results/2019-09-27/de/tpm/IMR_eRNA_tpm.txt
|
docker://continuumio/miniconda3:4.6.14 |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 | import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Read in count data
df = pd.read_csv(snakemake.input[0], sep="\t", index_col="Geneid")
print(df)
# Get on the raw counts
dfCounts = df.filter(regex="\d", axis=1)
# Get the lengths of each gene
dfLength = df["Length"]
# Calculate reads per kilobase by taking
# ```
# counts / length
# ```
# https://www.rna-seqblog.com/rpkm-fpkm-and-tpm-clearly-explained/
rpk = dfCounts.divide(dfLength, axis="index")
# Count up all the RPK values in a sample and divide this number by 1,000,000. This is your “per million” scaling factor.
rpkmFactor = rpk.sum(axis=0) / 1e6
# Divide the RPK values by the “per million” scaling factor. This gives you TPM.
dftpm = rpk / rpkmFactor
# Filter out all the zeros
dftpm = dftpm[dftpm > 0].dropna()
print(dftpm.head)
dftpm.to_csv(snakemake.output[0], sep="\t")
|
|
| GM19_eRNA_merge_counts |
1 |
- results/2019-06-03/hg19/counts/GM_eRNA_merged.txt
|
|
|
|
| IMR_eRNA_merge_counts |
1 |
- results/2019-06-03/hg19/counts/IMR_eRNA_merged.txt
|
|
|
|
| eRNA_feature_counts |
20 |
- results/2019-06-03/hg19/counts/per_sample/GM0h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM0h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM30min_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM30min_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM1h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM1h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM2h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM2h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM4h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM4h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM6h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM6h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM9h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM9h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM12h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM12h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM18h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM18h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM24h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM24h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM48h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM48h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/GM72h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/GM72h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/IMR0h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/IMR0h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/IMR30min_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/IMR30min_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/IMR1h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/IMR1h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/IMR2h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/IMR2h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/IMR4h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/IMR4h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/IMR6h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/IMR6h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/IMR12h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/IMR12h_eRNA.txt
- results/2019-06-03/hg19/counts/per_sample/IMR24h_eRNA.txt
- results/2019-06-03/hg19/qc/feature_counts/IMR24h_eRNA.txt
|
docker://continuumio/miniconda3:4.6.14 |
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 | """Snakemake wrapper for trimming paired-end reads using cutadapt."""
__author__ = "Julian de Ruiter"
__copyright__ = "Copyright 2017, Julian de Ruiter"
__email__ = "julianderuiter@gmail.com"
__license__ = "MIT"
from snakemake.shell import shell
# Run command.
log = snakemake.log_fmt_shell(stdout=False, stderr=True)
shell(
"featureCounts"
" {snakemake.params.extra}"
" -a {snakemake.params.annotation}"
" -o {snakemake.output.counts}"
" -T {snakemake.threads}"
" {snakemake.input.bam} {log}")
# Move summary to expected location.
summary_path = snakemake.output.counts + '.summary'
if summary_path != snakemake.output.summary:
shell("mv {summary_path} {snakemake.output.summary}")
|
|
| eRNA_saf_viral |
1 |
- results/2019-09-27/eRNA_viral.saf
|
docker://continuumio/miniconda3:4.6.14 |
|
| #!/usr/bin/env python
import pandas as pd
bed = pd.read_csv(snakemake.input[0], sep='\t')
bed.columns = ["Chr", "Start", "End", "GeneID", "frame", "Strand"]
bed.to_csv(snakemake.output[0], sep='\t',columns=["GeneID", "Chr", "Start", "End", "Strand"],index=False)
|
|